Analysis of NO2 emission from shipping¶

Import modules¶

In [66]:
## Modules for data loading
import os
import copy
import netCDF4 as nc
import pandas as pd
import datetime

## Modules for statistical analysis & wrangling
import numpy as np
from scipy import stats
import astropy.convolution.convolve as convolve
from scipy.signal import convolve2d
import spreg
import statsmodels.tsa.seasonal

## Modules for clustering
from sklearn.cluster import KMeans
from scipy.spatial.distance import cdist
from kneed import KneeLocator

## Modules for mapping & plotting
import cartopy as cart
import cartopy.crs as ccrs
import cartopy.io.shapereader
from shapely import geometry
from shapely.ops import unary_union
from shapely.prepared import prep
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import seaborn as sns

Define functions¶

In [67]:
# Gauss circle function
def gauss_circle(r=5):
    arr = np.zeros((2 * r + 1, 2 * r + 1), dtype=int)
    for row in range(arr.shape[0]):
        for col in range(arr.shape[1]):
            if np.sqrt(np.square(row - r) + np.square(col - r)) <= r:
                arr[row, col] = 1
    return(arr)

# ConvGistar function
def ConvGistar(input_array, kernel=gauss_circle()):
    ConvGistar = np.empty(input_array.shape)
    Xbar = np.nanmean(input_array, axis=(1, 2))
    n = input_array.shape[1]*input_array.shape[2]
    for i in range(input_array.shape[0]):
        ConvGistar[i] = (convolve(input_array[i], kernel, boundary='fill', fill_value=np.nan, preserve_nan=True, normalize_kernel=False) - Xbar[i]*np.nansum(kernel)) / (np.sqrt(convolve(np.square(input_array[i]), kernel, boundary='fill', fill_value=np.nan, preserve_nan=True, normalize_kernel=False) - np.square(Xbar[i])) * np.sqrt((n*np.nansum(np.square(kernel)) - np.square(np.nansum(kernel))) / (n - 1)))
    ConvGistar[np.isnan(input_array) == True] = np.nan
    return ConvGistar

# is_land function
def is_land(land, lon, lat):
    return land.contains(geometry.Point(lon, lat))

Load dataset¶

Load TROPOMI dataset: NO2 VCD

In [68]:
## Load TROPOMI dataset (netCDF)
no2_nc_filename = "./dataset/cube_no2_shipping_RedSea.nc"

## Read netCDF file
ds = nc.Dataset(no2_nc_filename, 'r')

## Store dimension information (time, latitude, and longitude)
dim_names = list(ds.dimensions) # get variable keys
time, lats, lons = ds.variables[dim_names[0]][:], ds.variables[dim_names[1]][:], ds.variables[dim_names[2]][:]
print("Dimensions of netCDF NO2 data:")
for dim in ds.dimensions.items():
    print(dim)

## Store NO2 column measurements from netCDF dataset
product = ds['/PRODUCT']
no2_vcd_attr = list(product.variables.items())
no2_vcd_name = no2_vcd_attr[0][0]
no2_vcd = product.variables[no2_vcd_name][:len(time)] # vertical column

## Close netCDF file
ds.close()

## Crop the dataset and dimension w.r.t. the region of interest
## If interested in a smaller ROI, change lats_roi = (0, lats_roi), and lons_roi = (0, lons_roi) where lats_roi <= lats.size and lons_roi <= lons.size
lats_roi = (0, lats.size)
lons_roi = (0, lons.size)
lats, lons = lats[lats_roi[0]:lats_roi[1]], lons[lons_roi[0]:lons_roi[1]]
no2_vcd = no2_vcd[:, lats_roi[0]:lats_roi[1], lons_roi[0]:lons_roi[1]]
extent = [min(lons), max(lons), min(lats), max(lats)] # coordinate extent of given netCDF dataset

## only run when Indian Ocean
# no2_scd[330, :, 160:] = np.nan ## Part of Indian Ocean dataset on 29 October 2019 should be masked due to large noise

del dim_names, dim, ds, product, no2_vcd_attr, no2_vcd_name
Dimensions of netCDF NO2 data:
('Time', <class 'netCDF4._netCDF4.Dimension'> (unlimited): name = 'Time', size = 1072)
('Latitude', <class 'netCDF4._netCDF4.Dimension'>: name = 'Latitude', size = 320)
('Longitude', <class 'netCDF4._netCDF4.Dimension'>: name = 'Longitude', size = 320)

Load shipping frequency dataset

In [69]:
## Load shipping frequency dataset (npy)
ship_freq_npy_filename = "./dataset/ship_freq_RedSea.npy" # Using ship track count data from Halpern, B. et al. (2015), regridded on the same WGS84 coordinate
ship_freq = np.load(ship_freq_npy_filename)[::-1, :][lats_roi[0]:lats_roi[1], lons_roi[0]:lons_roi[1]]
ship_freq = ship_freq/np.nanmax(ship_freq) # normalization; range of ship track data: [0, max. of ship_freq)

del ship_freq_npy_filename

Filter out the land coordinates from NO2 VCD

In [70]:
# Build ocean_mask
x, y = np.array(lons), np.array(lats)
xx, yy = np.meshgrid(x, y, sparse = True)
ocean_mask = np.empty([len(lats), len(lons)])
land = prep(unary_union(list(cartopy.io.shapereader.Reader(cartopy.io.shapereader.natural_earth(resolution='10m', category='physical', name='land')).geometries())))
for i in range(len(lons)):
    for j in range(len(lats)):
        ocean_mask[j, i] = not is_land(land, x[i], y[j])

# Replace land coordinate with NaN
ocean_mask[ocean_mask==0] = np.nan
print("Check on ocean_mask with NaN (1. if ocean, np.nan if land):\n", ocean_mask)
no2_vcd = np.multiply(np.array(copy.deepcopy(no2_vcd)[:, :, :].data), ocean_mask)
Check on ocean_mask with NaN (1. if ocean, np.nan if land):
 [[ 1.  1.  1. ... nan nan nan]
 [ 1.  1.  1. ... nan nan nan]
 [ 1.  1.  1. ... nan nan nan]
 ...
 [nan nan nan ...  1.  1.  1.]
 [nan nan nan ...  1.  1.  1.]
 [nan nan nan ...  1.  1.  1.]]

Plot map¶

Adjust plotting parameters

In [71]:
## Customize parameters for plotting
## Parameters for publication (high resolution)
plt.style.use('ggplot')
plt.rcParams['font.size'] = 13.5
plt.rcParams['xtick.labelsize'], plt.rcParams['ytick.labelsize'] = 13.5, 13.5
plt.rcParams['text.color'] = 'black'
plt.rcParams['axes.labelcolor'] = 'black'
plt.rcParams['xtick.color'] = 'black'
plt.rcParams['ytick.color'] = 'black'
fig_size = [abs(extent[1] - extent[0]), abs(extent[3] - extent[2])]

Plot base map

In [72]:
## Plot base map
ax = plt.axes(projection=ccrs.PlateCarree())
ax.add_feature(cart.feature.LAND, zorder=1, facecolor='#edc9af', alpha=1)
ax.add_feature(cart.feature.COASTLINE, linewidth=1, edgecolor='black')
ax.gridlines(draw_labels=True, linestyle='--')
ax.set_extent([extent[0] - 1, extent[1] + 1, extent[2] - 1, extent[3] + 1])

## Add a rectangle patch and an annotation
ax.add_patch(matplotlib.patches.Rectangle((extent[0], extent[2]), fig_size[0], fig_size[1], linewidth=1, edgecolor='r', facecolor='none'))
ax.annotate('Region of Interest', xy=(extent[0], extent[3]), xytext=(extent[0]*1.05, extent[3]*0.90),
            arrowprops=dict(facecolor='red', shrink=0.05))
plt.show()

Plot ocean coordinates only

In [73]:
## Plot base map
ax = plt.axes(projection=ccrs.PlateCarree())
ax.add_feature(cart.feature.LAND, zorder=1, facecolor='#edc9af', alpha=1)
ax.add_feature(cart.feature.COASTLINE, linewidth=1, edgecolor='black')
ax.gridlines(draw_labels=True, linestyle='--')

## Plot ocean-coordinates only
plt.imshow(ocean_mask, extent=[extent[0], extent[1], extent[2], extent[3]], cmap='viridis')
plt.show()

Plot true shipping frequency

In [74]:
## Plot base map
ax = plt.axes(projection=ccrs.PlateCarree())
ax.add_feature(cart.feature.LAND, zorder=1, facecolor='#edc9af', alpha=1)
ax.add_feature(cart.feature.COASTLINE, linewidth=1, edgecolor='black')
ax.gridlines(draw_labels=True, linestyle='--')

## Plot true shipping frequency
plt.imshow(ship_freq, extent=[extent[0], extent[1], extent[2], extent[3]], cmap='viridis')
plt.colorbar(matplotlib.cm.ScalarMappable(cmap='viridis'),
             fraction=0.0426, pad = 0.175, extend='neither', ax=plt.gca(),
             ticks=np.linspace(np.floor(np.nanmin(ship_freq) * 100) / 100, np.ceil(np.nanmax(ship_freq) * 100) / 100, 11),
             label='Normalized ship track count').outline.set_edgecolor(None)
plt.show()

Plot daily measurement data

In [75]:
## Plot base map
ax = plt.axes(projection=ccrs.PlateCarree())
ax.add_feature(cart.feature.LAND, zorder=1, facecolor='#edc9af', alpha=1)
ax.add_feature(cart.feature.COASTLINE, linewidth=1, edgecolor='black')
ax.gridlines(draw_labels=True, linestyle='--')

## Plot daily measurement data
plt.imshow(copy.deepcopy(no2_vcd)[888, :, :], # May 11th 2021
           cmap='viridis', extent=[extent[0], extent[1], extent[2], extent[3]], vmin=0, vmax=0.00005)
plt.colorbar(cmap='viridis', fraction=0.0426, pad=0.175, extend='both',
             label='NO$_{2}$ vertical column (mol m$^{-2}$)').outline.set_edgecolor(None)
plt.show()

Plot average measurement data over entire period

In [76]:
## Plot base map
ax = plt.axes(projection=ccrs.PlateCarree())
ax.add_feature(cart.feature.LAND, zorder=1, facecolor='#edc9af', alpha=1)
ax.add_feature(cart.feature.COASTLINE, linewidth=1, edgecolor='black')
ax.gridlines(draw_labels=True, linestyle='--')

## Plot average measurement data over entire period
plt.imshow(np.nanmean(copy.deepcopy(no2_vcd)[:, :, :], axis = 0),
           cmap='viridis', extent=[extent[0], extent[1], extent[2], extent[3]], vmin=0, vmax=0.00005)
plt.colorbar(cmap='viridis', fraction=0.0426, pad=0.175, extend='both',
             label='NO$_{2}$ vertical column (mol m$^{-2}$)').outline.set_edgecolor(None)
plt.show()

Perform statistical analysis

In [77]:
## Prepare data
NO2 = np.nanmean(copy.deepcopy(no2_vcd)[:, :, :], axis=0).reshape(-1, 1)
SHIP_TRACK = copy.deepcopy(ship_freq).reshape(-1, 1)
mask = ~np.isnan(NO2) & ~np.isnan(SHIP_TRACK)
X = NO2[mask].reshape(-1, 1)
y = SHIP_TRACK[mask]

## Pearson correlation
print('Spatial correlation between NO2 column level & ship track counts:', stats.pearsonr(NO2[mask], SHIP_TRACK[mask]))

# Fit OLS model
m1 = spreg.OLS(
    # Dependent variable
    y,
    # Independent variables
    X,
    # Dependent variable name
    name_y='Ship track', 
    # Independent variable name
    name_x=['Original NO2']
)
print(m1.summary)
Spatial correlation between NO2 column level & ship track counts: PearsonRResult(statistic=0.1508017057311495, pvalue=3.0986167591790774e-138)
REGRESSION
----------
SUMMARY OF OUTPUT: ORDINARY LEAST SQUARES
-----------------------------------------
Data set            :     unknown
Weights matrix      :        None
Dependent Variable  :  Ship track                Number of Observations:       27231
Mean dependent var  :      0.1240                Number of Variables   :           2
S.D. dependent var  :      0.1704                Degrees of Freedom    :       27229
R-squared           :      0.0227
Adjusted R-squared  :      0.0227
Sum squared residual:     772.356                F-statistic           :    633.6283
Sigma-square        :       0.028                Prob(F-statistic)     :  3.099e-138
S.E. of regression  :       0.168                Log likelihood        :    9868.366
Sigma-square ML     :       0.028                Akaike info criterion :  -19732.731
S.E of regression ML:      0.1684                Schwarz criterion     :  -19716.307

------------------------------------------------------------------------------------
            Variable     Coefficient       Std.Error     t-Statistic     Probability
------------------------------------------------------------------------------------
            CONSTANT       0.0234761       0.0041228       5.6942356       0.0000000
        Original NO2    6965.7221079     276.7252891      25.1719752       0.0000000
------------------------------------------------------------------------------------

REGRESSION DIAGNOSTICS
MULTICOLLINEARITY CONDITION NUMBER            7.953

TEST ON NORMALITY OF ERRORS
TEST                             DF        VALUE           PROB
Jarque-Bera                       2       67802.441           0.0000

DIAGNOSTICS FOR HETEROSKEDASTICITY
RANDOM COEFFICIENTS
TEST                             DF        VALUE           PROB
Breusch-Pagan test                1        2333.547           0.0000
Koenker-Bassett test              1         573.868           0.0000
================================ END OF REPORT =====================================

Plot scatter plot of original NO2 data

In [78]:
## Scatter plot of NO2 data
fig_boxplot = sns.scatterplot(x=NO2[mask], y=SHIP_TRACK[mask], alpha=1, color='#440154', linewidth=0.1, s=10)
min_before_outlier_removal, max_before_outlier_removal = np.nanmin(NO2[mask]), np.nanmax(NO2[mask])
fig_boxplot.set(xlabel='NO$_2$ vertical column (mol m$^{-2}$)',
                ylabel='Normalized ship track count',
                ylim = (-0.02, 1.02),
                xlim = (0.99 * min_before_outlier_removal, 1.01 * max_before_outlier_removal))
plt.ticklabel_format(axis="x", style="sci", scilimits=(0, 0))
plt.show()

Perform clustering NO2 without spatial data preprocessing¶

Perform clustering

In [79]:
## Cluster NO2 data (average over entire period)
clustered_no2_vcd = np.nanmean(np.multiply(copy.deepcopy(no2_vcd)[:, :, :], ocean_mask), axis=0)
X = clustered_no2_vcd[~np.isnan(clustered_no2_vcd)].reshape(-1, 1)

## Calculate distortions per k
distortions = []
K = range(1, 16) # grid search of k from 1 to 15
for k in K:
    kmeanModel = KMeans(init='k-means++', n_clusters=k, random_state=1).fit(X)
    kmeanModel.fit(X)
    distortions.append(sum(np.min(cdist(X, kmeanModel.cluster_centers_, 'euclidean'), axis=1)) / X.shape[0])
plt.plot(K, distortions, linewidth=3, color='#440154')
plt.grid(True)
plt.xlabel('Number of clusters', color='black')
plt.ylabel('Distortion score', color='black')
plt.show()

## Find the optimal number of clusters
kn = KneeLocator(K, distortions, curve='convex', direction='decreasing')
optimal_k = kn.knee
print("Optimal number of clusters: ", optimal_k)

## Run k-means clustering with optimal number of clusters
kmeans1 = KMeans(init='k-means++', n_clusters=optimal_k, random_state=1).fit(X)
idx = np.argsort(kmeans1.cluster_centers_.sum(axis = 1))
lut = np.zeros_like(idx)
lut[idx] = np.arange(optimal_k)
kmeans1_lab = lut[kmeans1.labels_] + 1
temp_1 = np.zeros([len(lats), len(lons)], dtype=int)
ocean_idx = np.where(ocean_mask.flatten() == 1)
# land_idx = np.where(ocean_mask.flatten() != 1)
np.put(temp_1, ocean_idx, kmeans1_lab)
no2_kmeans_label = np.multiply(temp_1, ocean_mask)
print("K-means label of NO2:\n", no2_kmeans_label)

del clustered_no2_vcd, X, K, k, kn, kmeans1, idx, lut, kmeans1_lab, temp_1
Optimal number of clusters:  4
K-means label of NO2:
 [[ 2.  2.  2. ... nan nan nan]
 [ 2.  2.  2. ... nan nan nan]
 [ 2.  2.  2. ... nan nan nan]
 ...
 [nan nan nan ...  2.  2.  2.]
 [nan nan nan ...  1.  1.  1.]
 [nan nan nan ...  1.  1.  1.]]

Plot clustered data

In [80]:
## Plot base map
ax = plt.axes(projection=ccrs.PlateCarree())
ax.add_feature(cart.feature.LAND, zorder=1, facecolor='#edc9af', alpha=1)
ax.add_feature(cart.feature.COASTLINE, linewidth=1, edgecolor='black')
ax.gridlines(draw_labels=True, linestyle='--')

## Plot clustered data
no2_clim_min, no2_clim_max = np.nanmin(no2_kmeans_label), np.nanmax(no2_kmeans_label)
plt.imshow(no2_kmeans_label, cmap = matplotlib.cm.get_cmap("viridis", optimal_k),
           extent=[extent[0], extent[1], extent[2], extent[3]],
           vmin = np.nanmin(no2_kmeans_label), vmax = np.nanmax(no2_kmeans_label))
cbar = plt.colorbar(fraction=0.0426, pad=0.175, label='Cluster label')
labels = np.linspace(no2_clim_min, no2_clim_max, optimal_k, dtype='int32')
loc = 0.75 * labels + 0.625
cbar.set_ticks(loc)
cbar.set_ticklabels(labels)
cbar.outline.set_edgecolor(None)
plt.show()

Perform statistical analysis

In [81]:
## Prepare data
NO2 = copy.deepcopy(no2_kmeans_label).reshape(-1, 1)
SHIP_TRACK = copy.deepcopy(ship_freq).reshape(-1, 1)
mask = ~np.isnan(NO2) & ~np.isnan(SHIP_TRACK)
X = NO2[mask].reshape(-1, 1)
y = SHIP_TRACK[mask]

## Pearson correlation
print('Spatial correlation between NO2 column level & ship track counts:', stats.pearsonr(NO2[mask], SHIP_TRACK[mask]))

# Fit OLS model
m2 = spreg.OLS(
    # Dependent variable
    y,
    # Independent variables
    X,
    # Dependent variable name
    name_y='Ship track', 
    # Independent variable name
    name_x=['NO2 cluster w/o ConvGi*']
)
print(m2.summary)
Spatial correlation between NO2 column level & ship track counts: PearsonRResult(statistic=0.25491521162444686, pvalue=0.0)
REGRESSION
----------
SUMMARY OF OUTPUT: ORDINARY LEAST SQUARES
-----------------------------------------
Data set            :     unknown
Weights matrix      :        None
Dependent Variable  :  Ship track                Number of Observations:       27231
Mean dependent var  :      0.1240                Number of Variables   :           2
S.D. dependent var  :      0.1704                Degrees of Freedom    :       27229
R-squared           :      0.0650
Adjusted R-squared  :      0.0649
Sum squared residual:     738.972                F-statistic           :   1892.3572
Sigma-square        :       0.027                Prob(F-statistic)     :           0
S.E. of regression  :       0.165                Log likelihood        :   10469.974
Sigma-square ML     :       0.027                Akaike info criterion :  -20935.947
S.E of regression ML:      0.1647                Schwarz criterion     :  -20919.523

------------------------------------------------------------------------------------
            Variable     Coefficient       Std.Error     t-Statistic     Probability
------------------------------------------------------------------------------------
            CONSTANT       0.0212482       0.0025649       8.2843396       0.0000000
NO2 cluster w/o ConvGi*       0.0589597       0.0013554      43.5012321       0.0000000
------------------------------------------------------------------------------------

REGRESSION DIAGNOSTICS
MULTICOLLINEARITY CONDITION NUMBER            4.936

TEST ON NORMALITY OF ERRORS
TEST                             DF        VALUE           PROB
Jarque-Bera                       2       65686.622           0.0000

DIAGNOSTICS FOR HETEROSKEDASTICITY
RANDOM COEFFICIENTS
TEST                             DF        VALUE           PROB
Breusch-Pagan test                1        4110.224           0.0000
Koenker-Bassett test              1        1017.322           0.0000
================================ END OF REPORT =====================================

Calculate statistics per cluster (mean & std)

In [82]:
## Store the cluster labels as a mask
dict_cluster = {}
for i in range(optimal_k):
    tmp = copy.deepcopy(no2_kmeans_label)
    tmp = tmp / (i + 1)
    tmp[tmp != 1] = np.nan
    dict_cluster[i + 1] = tmp

## Copy data
no2_vcd_copied = copy.deepcopy(no2_vcd)
ship_freq_copied = copy.deepcopy(ship_freq)

## Define rounding function to designated significant numbers
def round_sf(number, significant):
    return round(number, significant - int(np.floor(np.log10(abs(number)))) - 1)

## Calculate mean and std per cluster
dict_stat_per_cluster = {}
for i in range(1, optimal_k+1):
    dict_stat_per_cluster[i] = round_sf(np.nanmean(np.multiply(no2_vcd_copied, dict_cluster[i])), 4), round_sf(np.nanstd(np.multiply(no2_vcd_copied, dict_cluster[i])), 4)
print('Statistics of no2 measurement per cluster (mean, std):\n', dict_stat_per_cluster)

## Calculate shipping frequency per cluster
dict_ship_freq_per_cluster = {}
for i in range(1, optimal_k+1):
    dict_ship_freq_per_cluster[i] = round_sf(np.nanmean(np.multiply(ship_freq_copied, dict_cluster[i])), 4), round_sf(np.nanstd(np.multiply(ship_freq_copied, dict_cluster[i])), 4)
print('Statistics of shipping frequency per cluster (mean, std):\n', dict_ship_freq_per_cluster)

del i, tmp, no2_vcd_copied, ship_freq_copied
Statistics of no2 measurement per cluster (mean, std):
 {1: (1.19e-05, 5.747e-06), 2: (1.476e-05, 7.22e-06), 3: (1.924e-05, 1.3e-05), 4: (3.291e-05, 2.933e-05)}
Statistics of shipping frequency per cluster (mean, std):
 {1: (0.04851, 0.05815), 2: (0.1897, 0.199), 3: (0.1548, 0.2107), 4: (0.03216, 0.01366)}

Plot box plot of clustered NO2 data

In [83]:
## Prepare clustered data
NO2_cluster = no2_kmeans_label.reshape(-1, 1)
SHIP_TRACK = copy.deepcopy(ship_freq).reshape(-1, 1)
mask = ~np.isnan(NO2_cluster) & ~np.isnan(SHIP_TRACK)
color_palette_viridis = [ "#440154", "#31688e", "#35b779", "#fde725"]

## Box plot of NO2 clustered data
fig_boxplot = sns.boxplot(x='NO2_cluster', y='Ship', data={'NO2_cluster': NO2_cluster[mask], 'Ship': SHIP_TRACK[mask]}, palette=color_palette_viridis,                          
                          showfliers=False, showmeans=True, meanprops={"markerfacecolor":"red", "markeredgecolor":"red"})
fig_boxplot.set(xlabel='Original NO$_2$ kmeans cluster label', ylabel='Normalized ship track count')
plt.show()

Perform clustering on NO2 using $ConvG_i^*$¶

Calculate $ConvG_i^*$

In [84]:
# Calculate ConvGistar
no2_ConvGistar = ConvGistar(np.multiply(copy.deepcopy(no2_vcd), ocean_mask))

Plot $ConvG_i^*$

In [85]:
## Plot base map
ax = plt.axes(projection=ccrs.PlateCarree())
ax.add_feature(cart.feature.LAND, zorder=1, facecolor='#edc9af', alpha=1)
ax.add_feature(cart.feature.COASTLINE, linewidth=1, edgecolor='black')
ax.gridlines(draw_labels=True, linestyle='--')

## Plot clustered data
plt.imshow(copy.deepcopy(no2_ConvGistar)[888, :, :], # May 11th 2021
           cmap='viridis', extent=[extent[0], extent[1], extent[2], extent[3]], vmin=-.5, vmax=.5)
plt.colorbar(cmap='viridis',
             fraction=0.0426, pad=0.175, extend='both',
             label='$ConvG_i^*$ of NO$_{2}$ vertical column').outline.set_edgecolor(None)
plt.show()

Exclude near-coast coordinates¶

Build mask to filter out land & near-coast coordinates

In [86]:
# Build ocean_mask_far_coast
x, y = np.array(lons), np.array(lats)
xx, yy = np.meshgrid(x, y, sparse=True)
land = prep(unary_union(list(cartopy.io.shapereader.Reader(cartopy.io.shapereader.natural_earth(resolution='10m', category='physical', name='land')).geometries())))
ocean_mask_far_coast = np.empty([len(lats), len(lons)])
for i in range(len(lons)):
    for j in range(len(lats)):
        ocean_mask_far_coast[j, i] = not is_land(land, x[i], y[j])
land_mask = -(ocean_mask_far_coast) + 1

rad = 20 # hyperparameter of kernel radius
kernel_xx, kernel_yy = np.meshgrid(np.arange(-rad, rad + 1, 1), np.arange(-rad, rad + 1, 1))
kernel_near_coast = np.square(kernel_xx) + np.square(kernel_yy) <= np.square(rad)
ocean_mask_far_coast = -(convolve2d(land_mask.astype(int), kernel_near_coast.astype(int), mode='same').astype(bool).astype(float)) + 1
ocean_mask_far_coast[ocean_mask_far_coast == 0] = np.nan
no2_vcd_far_coast = np.multiply(copy.deepcopy(no2_vcd), ocean_mask_far_coast)

del rad

Plot near-coast filtered ocean coordinates

In [87]:
## Plot base map
ax = plt.axes(projection=ccrs.PlateCarree())
ax.add_feature(cart.feature.LAND, zorder=1, facecolor='#edc9af', alpha=1)
ax.add_feature(cart.feature.COASTLINE, linewidth=1, edgecolor='black')
ax.gridlines(draw_labels=True, linestyle='--')

## Plot ocean-coordinates only
plt.imshow(ocean_mask_far_coast, extent=[extent[0], extent[1], extent[2], extent[3]], cmap='viridis')
plt.show()

Plot near-coast filtered true shipping frequency

In [88]:
## Plot base map
ax = plt.axes(projection=ccrs.PlateCarree())
ax.add_feature(cart.feature.LAND, zorder=1, facecolor='#edc9af', alpha=1)
ax.add_feature(cart.feature.COASTLINE, linewidth=1, edgecolor='black')
ax.gridlines(draw_labels=True, linestyle='--')

## Plot true shipping frequency
img = np.multiply(ship_freq[:, :], ocean_mask_far_coast)
plt.imshow(img, extent=[extent[0], extent[1], extent[2], extent[3]], cmap='viridis')
plt.colorbar(matplotlib.cm.ScalarMappable(cmap='viridis'),
             fraction=0.0426, pad = 0.175, extend='neither', ax=plt.gca(),
             ticks=np.linspace(np.floor(np.nanmin(img)*100)/100, np.ceil(np.nanmax(img)*100)/100, 11),
             label='Normalized ship track count').outline.set_edgecolor(None)
plt.show()

del img

Plot near-coast filtered daily measurement data

In [89]:
## Plot base map
ax = plt.axes(projection=ccrs.PlateCarree())
ax.add_feature(cart.feature.LAND, zorder=1, facecolor='#edc9af', alpha=1)
ax.add_feature(cart.feature.COASTLINE, linewidth=1, edgecolor='black')
ax.gridlines(draw_labels=True, linestyle='--')

## Plot daily measurement data
plt.imshow(copy.deepcopy(no2_vcd_far_coast)[888, :, :], # May 11th 2021
           cmap='viridis', extent=[extent[0], extent[1], extent[2], extent[3]], vmin=0, vmax=0.00005)
plt.colorbar(cmap='viridis', fraction=0.0426, pad=0.175, extend='both',
             label='NO$_{2}$ vertical column (mol m$^{-2}$)').outline.set_edgecolor(None)
plt.show()

Inspect how near-coast removal effectively removes outliers (compared to original NO2 data scatter plot)

In [90]:
## Prepare data
NO2 = np.nanmean(copy.deepcopy(no2_vcd_far_coast)[:, :, :], axis=0).reshape(-1, 1)
SHIP_TRACK = copy.deepcopy(ship_freq).reshape(-1, 1)
mask = ~np.isnan(NO2) & ~np.isnan(SHIP_TRACK)

## Scatter plot of NO2 data
fig_boxplot = sns.scatterplot(x=NO2[mask], y=SHIP_TRACK[mask], alpha=1, color='#440154', linewidth=0.1, s=10)
fig_boxplot.set(xlabel='NO$_2$ vertical column (mol m$^{-2}$)',
                ylabel='Normalized ship track count',
                ylim = (-0.02, 1.02),
                xlim = (0.99 * min_before_outlier_removal, 1.01 * max_before_outlier_removal))
plt.ticklabel_format(axis="x", style="sci", scilimits=(0, 0))
plt.show()

Perform clustering on near-coast excluded NO2 using $ConvG_i^*$¶

Calculate $ConvG_i^*$

In [91]:
# Calculate ConvGistar
no2_vcd_ConvGistar = ConvGistar(copy.deepcopy(no2_vcd_far_coast))

Plot $ConvG_i^*$

In [92]:
## Plot base map
ax = plt.axes(projection=ccrs.PlateCarree())
ax.add_feature(cart.feature.LAND, zorder=1, facecolor='#edc9af', alpha=1)
ax.add_feature(cart.feature.COASTLINE, linewidth=1, edgecolor='black')
ax.gridlines(draw_labels=True, linestyle='--')

## Plot daily measurement data
plt.imshow(copy.deepcopy(no2_vcd_ConvGistar)[888, :, :], # May 11th 2021
           cmap='viridis', extent=[extent[0], extent[1], extent[2], extent[3]], vmin=-.5, vmax=.5)
plt.colorbar(cmap='viridis', fraction=0.0426, pad=0.175, extend='both',
             label='$ConvG_i^*$ of NO$_{2}$ vertical column').outline.set_edgecolor(None)
plt.show()

Perform clustering on $ConvG_i^*$

In [93]:
# NO$_{2}$ values excluding NaN
clustered_no2_vcd = np.nanmean(no2_vcd_ConvGistar, axis=0)
X = clustered_no2_vcd[~np.isnan(clustered_no2_vcd)].reshape(-1, 1)

# Run k means and store its label
kmeans_fit = KMeans(init='k-means++', n_clusters=optimal_k, random_state=1).fit(X)
idx = np.argsort(kmeans_fit.cluster_centers_.sum(axis=1))
lut = np.zeros_like(idx)
lut[idx] = np.arange(optimal_k)
kmeans_fit_lab = lut[kmeans_fit.labels_] + 1
print("K-means label of NO2:\n", kmeans_fit_lab)

# Mask k means label of land coordinates
np_zeros = np.zeros([len(lats), len(lons)], dtype=int)
ocean_idx = np.where(ocean_mask_far_coast.flatten() == 1)
np.put(np_zeros, ocean_idx, kmeans_fit_lab)
no2_ConvGistar_kmeans_label = np.multiply(np_zeros, ocean_mask_far_coast)

del clustered_no2_vcd, X, kmeans_fit, idx, lut, kmeans_fit_lab, np_zeros, ocean_idx
K-means label of NO2:
 [3 3 3 ... 2 2 2]

Plot clustered $ConvG_i^*$ data

In [94]:
## Plot base map
ax = plt.axes(projection=ccrs.PlateCarree())
ax.add_feature(cart.feature.LAND, zorder=1, facecolor='#edc9af', alpha=1)
ax.add_feature(cart.feature.COASTLINE, linewidth=1, edgecolor='black')
ax.gridlines(draw_labels=True, linestyle='--')

## Plot clustered data
no2_clim_min, no2_clim_max = np.nanmin(no2_ConvGistar_kmeans_label), np.nanmax(no2_ConvGistar_kmeans_label)
plt.imshow(no2_ConvGistar_kmeans_label, cmap = matplotlib.cm.get_cmap("viridis", optimal_k),
           extent=[extent[0], extent[1], extent[2], extent[3]],
           vmin = np.nanmin(no2_ConvGistar_kmeans_label), vmax = np.nanmax(no2_ConvGistar_kmeans_label))
cbar = plt.colorbar(fraction=0.0426, pad=0.175, label='Cluster label')
labels = np.linspace(no2_clim_min, no2_clim_max, optimal_k, dtype='int32')
loc = 0.75 * labels + 0.625
cbar.set_ticks(loc)
cbar.set_ticklabels(labels)
cbar.outline.set_edgecolor(None)
plt.show()

Perform statistical analysis

In [95]:
## Prepare data
NO2_ConvGistar_cluster = copy.deepcopy(no2_ConvGistar_kmeans_label).reshape(-1,1)
SHIP_TRACK = ship_freq.reshape(-1,1)
mask = ~np.isnan(NO2_ConvGistar_cluster) & ~np.isnan(SHIP_TRACK)
X = NO2[mask].reshape(-1, 1)
y = SHIP_TRACK[mask]

## Pearson correlation
print('Spatial correlation between NO2 column level & ship track counts:', stats.pearsonr(NO2_ConvGistar_cluster[mask], SHIP_TRACK[mask]))

# Fit OLS model
m3 = spreg.OLS(
    # Dependent variable
    y,
    # Independent variables
    X,
    # Dependent variable name
    name_y='Ship track', 
    # Independent variable name
    name_x=['NO2 cluster with ConvGi*']
)
print(m3.summary)
Spatial correlation between NO2 column level & ship track counts: PearsonRResult(statistic=0.6235099013736775, pvalue=0.0)
REGRESSION
----------
SUMMARY OF OUTPUT: ORDINARY LEAST SQUARES
-----------------------------------------
Data set            :     unknown
Weights matrix      :        None
Dependent Variable  :  Ship track                Number of Observations:       11162
Mean dependent var  :      0.2076                Number of Variables   :           2
S.D. dependent var  :      0.2251                Degrees of Freedom    :       11160
R-squared           :      0.3919
Adjusted R-squared  :      0.3919
Sum squared residual:     343.730                F-statistic           :   7193.3962
Sigma-square        :       0.031                Prob(F-statistic)     :           0
S.E. of regression  :       0.175                Log likelihood        :    3585.994
Sigma-square ML     :       0.031                Akaike info criterion :   -7167.988
S.E of regression ML:      0.1755                Schwarz criterion     :   -7153.347

------------------------------------------------------------------------------------
            Variable     Coefficient       Std.Error     t-Statistic     Probability
------------------------------------------------------------------------------------
            CONSTANT      -0.9741541       0.0140322     -69.4226693       0.0000000
NO2 cluster with ConvGi*    82967.5574506     978.2307568      84.8138917       0.0000000
------------------------------------------------------------------------------------

REGRESSION DIAGNOSTICS
MULTICOLLINEARITY CONDITION NUMBER           16.835

TEST ON NORMALITY OF ERRORS
TEST                             DF        VALUE           PROB
Jarque-Bera                       2        3122.068           0.0000

DIAGNOSTICS FOR HETEROSKEDASTICITY
RANDOM COEFFICIENTS
TEST                             DF        VALUE           PROB
Breusch-Pagan test                1        4648.023           0.0000
Koenker-Bassett test              1        2375.033           0.0000
================================ END OF REPORT =====================================

Calculate statistics per cluster (mean & std)

In [96]:
## Store the cluster labels as a mask
dict_ConvGistar_cluster = {}
for i in range(optimal_k):
    tmp = copy.deepcopy(no2_ConvGistar_kmeans_label)
    tmp = tmp / (i + 1)
    tmp[tmp != 1] = np.nan
    dict_ConvGistar_cluster[i + 1] = tmp

## Copy data
no2_vcd_copied = copy.deepcopy(no2_vcd)
ship_freq_copied = copy.deepcopy(ship_freq)

## Define rounding function to designated significant numbers
def round_sf(number, significant):
    return round(number, significant - int(np.floor(np.log10(abs(number)))) - 1)

## Calculate mean and std per cluster
dict_stat_per_cluster = {}
for i in range(1, optimal_k+1):
    dict_stat_per_cluster[i] = round_sf(np.nanmean(np.multiply(no2_vcd_copied, dict_ConvGistar_cluster[i])), 4), round_sf(np.nanstd(np.multiply(no2_vcd_copied, dict_ConvGistar_cluster[i])), 4)
print('Statistics of no2 measurement per cluster (mean, std):\n', dict_stat_per_cluster)

## Calculate shipping frequency per cluster
dict_ship_freq_per_cluster = {}
for i in range(1, optimal_k+1):
    dict_ship_freq_per_cluster[i] = round_sf(np.nanmean(np.multiply(ship_freq_copied, dict_ConvGistar_cluster[i])), 4), round_sf(np.nanstd(np.multiply(ship_freq_copied, dict_ConvGistar_cluster[i])), 4)
print('Statistics of shipping frequency per cluster (mean, std):\n', dict_ship_freq_per_cluster)

del i, tmp, no2_vcd_copied, ship_freq_copied
Statistics of no2 measurement per cluster (mean, std):
 {1: (1.201e-05, 5.902e-06), 2: (1.343e-05, 6.607e-06), 3: (1.485e-05, 7.577e-06), 4: (1.681e-05, 9.197e-06)}
Statistics of shipping frequency per cluster (mean, std):
 {1: (0.05392, 0.06413), 2: (0.07848, 0.08036), 3: (0.2899, 0.2203), 4: (0.435, 0.2433)}

Plot box plot of clustered $ConvG_i^*$ data

In [97]:
## Prepare clustered data
NO2_cluster = no2_ConvGistar_kmeans_label.reshape(-1, 1)
SHIP_TRACK = copy.deepcopy(ship_freq).reshape(-1, 1)
mask = ~np.isnan(NO2_cluster) & ~np.isnan(SHIP_TRACK)

## Box plot of NO2 clustered data
fig_boxplot = sns.boxplot(x='NO2_cluster', y='Ship', data={'NO2_cluster': NO2_cluster[mask], 'Ship': SHIP_TRACK[mask]}, palette=color_palette_viridis,
                          showfliers=False, showmeans=True, meanprops={"markerfacecolor":"red", "markeredgecolor":"red"})
fig_boxplot.set(xlabel='NO$_2$ $ConvG_i^*$ kmeans cluster label', ylabel='Normalized ship track count')
plt.show()

Perform temporal analysis¶

For temporal analysis, we focus on the comparison between shipping route (cluster with the highest label) and background (cluster with the lowest label).

Plot shipping route (cluster with the highest label) from clustered $ConvG_i^*$ data

In [98]:
## Plot base map
ax = plt.axes(projection=ccrs.PlateCarree())
ax.add_feature(cart.feature.LAND, zorder=1, facecolor='#edc9af', alpha=1)
ax.add_feature(cart.feature.COASTLINE, linewidth=1, edgecolor='black')
ax.gridlines(draw_labels=True, linestyle='--')

## Build shipping-route mask
no2_ConvGistar_shipping = copy.deepcopy(no2_ConvGistar_kmeans_label)
no2_ConvGistar_shipping[no2_ConvGistar_shipping != optimal_k] = np.nan
no2_ConvGistar_shipping = no2_ConvGistar_shipping / optimal_k

## Plot clustered data masked by shipping-route mask
plt.imshow(no2_ConvGistar_shipping, cmap = matplotlib.cm.get_cmap("viridis", optimal_k),
           extent=[extent[0], extent[1], extent[2], extent[3]])
plt.title('Shipping-route cluster')
plt.show()

Plot background (cluster with the lowest label) from clustered $ConvG_i^*$ data

In [99]:
## Plot base map
ax = plt.axes(projection=ccrs.PlateCarree())
ax.add_feature(cart.feature.LAND, zorder=1, facecolor='#edc9af', alpha=1)
ax.add_feature(cart.feature.COASTLINE, linewidth=1, edgecolor='black')
ax.gridlines(draw_labels=True, linestyle='--')

## Build background mask
no2_ConvGistar_background = copy.deepcopy(no2_ConvGistar_kmeans_label)
no2_ConvGistar_background[no2_ConvGistar_background != 1] = np.nan
# no2_ConvGistar_background = no2_ConvGistar_shipping / 1

## Plot clustered data masked by background mask
plt.imshow(no2_ConvGistar_background, cmap = matplotlib.cm.get_cmap("viridis", optimal_k),
           extent=[extent[0], extent[1], extent[2], extent[3]])
plt.title('Background cluster')
plt.show()

Generate monthly timestamp

In [100]:
## Generate monthly timestamp
## Monthly timestamp: first-day indices from Dec. 5th 2018 to Nov. 10th 2021 (total 1,072 days)
month_index = [0,
               27, 58, 86, 117, 147, 178, 208, 239, 270, 300, 331, 361,
               392, 423, 452, 483, 513, 544, 574, 605, 636, 666, 697, 727,
               758, 789, 817, 848, 878, 909, 939, 970, 1001, 1031, 1062, 1072]
month_name = ['Dec. 2018',
              'Jan. 2019', 'Feb. 2019', 'Mar. 2019', 'Apr. 2019', 'May. 2019', 'Jun. 2019', 'Jul. 2019', 'Aug. 2019', 'Sep. 2019', 'Oct. 2019', 'Nov. 2019', 'Dec. 2019',
              'Jan. 2020', 'Feb. 2020', 'Mar. 2020', 'Apr. 2020', 'May. 2020', 'Jun. 2020', 'Jul. 2020', 'Aug. 2020', 'Sep. 2020', 'Oct. 2020', 'Nov. 2020', 'Dec. 2020',
              'Jan. 2021', 'Feb. 2021', 'Mar. 2021', 'Apr. 2021', 'May. 2021', 'Jun. 2021', 'Jul. 2021', 'Aug. 2021', 'Sep. 2021', 'Oct. 2021', 'Nov. 2021']
month = {}
for i in range(len(month_index) - 1):
    month["{0}".format(i)] = range(month_index[i], month_index[i + 1])

Calculate monthly statistics per clusters (e.g. shipping route vs. background)

In [101]:
## Calculate monthly mean & std
no2_monthly_mean_shipping = {}
no2_monthly_mean_background = {}
no2_monthly_std_shipping = {}
no2_monthly_std_background = {}
for i in range(len(month_index) - 1):
    no2_monthly_mean_shipping["{0}".format(i)] = np.nanmean(np.nanmean(np.multiply(copy.deepcopy(no2_vcd)[month[str(i)], :, :], no2_ConvGistar_shipping), axis=0))
    no2_monthly_mean_background["{0}".format(i)] = np.nanmean(np.nanmean(np.multiply(copy.deepcopy(no2_vcd)[month[str(i)], :, :], no2_ConvGistar_background), axis=0))
    no2_monthly_std_shipping["{0}".format(i)] = np.nanstd(np.nanmean(np.multiply(copy.deepcopy(no2_vcd)[month[str(i)], :, :], no2_ConvGistar_shipping), axis=0))
    no2_monthly_std_background["{0}".format(i)] = np.nanstd(np.nanmean(np.multiply(copy.deepcopy(no2_vcd)[month[str(i)], :, :], no2_ConvGistar_background), axis=0))

Plot the trend of monthly statistics in shipping route cluster

In [102]:
## Prepare data
x = np.array(["January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December"])
y1 = [*no2_monthly_mean_shipping.values()][1:13]
y2 = [*no2_monthly_mean_shipping.values()][13:25]
y3 = [*no2_monthly_mean_shipping.values()][25:]
y1err = [*no2_monthly_std_shipping.values()][1:13]
y2err = [*no2_monthly_std_shipping.values()][13:25]
y3err = [*no2_monthly_std_shipping.values()][25:]

## Plot trend time series
plt.figure(figsize=(14, 7))
plt.plot(x, y1, label='Monthly mean of shipping route 2019', color='#22a884')
plt.errorbar(x, y1, yerr=y1err, fmt='o', capsize=5, alpha=0.5, color='#22a884')
plt.plot(x, y2, label='Monthly mean of shipping route 2020', color='#7ad151')
plt.errorbar(x, y2, yerr=y1err, fmt='o', capsize=5, alpha=0.5, color='#7ad151')
plt.plot(range(len(y3)), y3, label='Monthly mean of shipping route 2021', color='#fde725')
plt.errorbar(range(len(y3)), y3, yerr=y3err, fmt='o', capsize=5, alpha=0.5, color='#fde725')
plt.ticklabel_format(axis='y', style='sci', scilimits=(-5, -5))
plt.xticks(rotation = 90)
plt.ylabel('NO$_2$ vertical column (mol m$^{-2}$)')
plt.legend()
plt.tight_layout()
plt.show()

Plot the trend of monthly statistics in background cluster

In [103]:
## Prepare data
x = np.array(["January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December"])
y1 = [*no2_monthly_mean_background.values()][1:13]
y2 = [*no2_monthly_mean_background.values()][13:25]
y3 = [*no2_monthly_mean_background.values()][25:]
y1err = [*no2_monthly_std_background.values()][1:13]
y2err = [*no2_monthly_std_background.values()][13:25]
y3err = [*no2_monthly_std_background.values()][25:]

## Plot trend time series
plt.figure(figsize=(14, 7))
plt.plot(x, y1, label='Monthly mean of background 2019', color='#440154')
plt.errorbar(x, y1, yerr=y1err, fmt='o', capsize=5, alpha=0.5, color='#440154')
plt.plot(x, y2, label='Monthly mean of background 2020', color='#21918c')
plt.errorbar(x, y2, yerr=y1err, fmt='o', capsize=5, alpha=0.5, color='#21918c')
plt.plot(range(len(y3)), y3, label='Monthly mean of background 2021', color='#5ec962')
plt.errorbar(range(len(y3)), y3, yerr=y3err, fmt='o', capsize=5, alpha=0.5, color='#5ec962')
plt.ticklabel_format(axis='y', style='sci', scilimits=(-5, -5))
plt.xticks(rotation = 90)
plt.ylabel('NO$_2$ vertical column (mol m$^{-2}$)')
plt.legend()
plt.tight_layout()
plt.show()

Plot the trend of monthly statistics in shipping route cluster & background cluster

In [104]:
## Prepare data
x = np.array(["January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December"])
y1_ship = [*no2_monthly_mean_shipping.values()][1:13]
y2_ship = [*no2_monthly_mean_shipping.values()][13:25]
y3_ship = [*no2_monthly_mean_shipping.values()][25:]
y1_back = [*no2_monthly_mean_background.values()][1:13]
y2_back = [*no2_monthly_mean_background.values()][13:25]
y3_back = [*no2_monthly_mean_background.values()][25:]
y1err_ship = [*no2_monthly_std_shipping.values()][1:13]
y2err_ship = [*no2_monthly_std_shipping.values()][13:25]
y3err_ship = [*no2_monthly_std_shipping.values()][25:]
y1err_back = [*no2_monthly_std_background.values()][1:13]
y2err_back = [*no2_monthly_std_background.values()][13:25]
y3err_back = [*no2_monthly_std_background.values()][25:]

## Plot trend time series
plt.figure(figsize=(14, 7))
plt.plot(x, y1_ship, label='Monthly NO$_2$ mean 2019 - ship', color='#22a884')
plt.errorbar(x, y1_ship, yerr=y1err_ship, fmt='o', capsize=5, alpha=0.5, color='#22a884')
plt.plot(x, y2_ship, label='Monthly NO$_2$ mean 2020 - ship', color='#7ad151')
plt.errorbar(x, y2_ship, yerr=y2err_ship, fmt='o', capsize=5, alpha=0.5, color='#7ad151')
plt.plot(range(len(y3_ship)), y3_ship, label='Monthly NO$_2$ mean 2021 - ship', color='#fde725')
plt.errorbar(range(len(y3_ship)), y3_ship, yerr=y3err_ship, fmt='o', capsize=5, alpha=0.5, color='#fde725')
plt.plot(x, y1_back, label='Monthly NO$_2$ mean 2019 - back', color='#440154')
plt.errorbar(x, y1_back, yerr=y1err_back, fmt='o', capsize=5, alpha=0.5, color='#440154')
plt.plot(x, y2_back, label='Monthly NO$_2$ mean 2020 - back', color='#414487')
plt.errorbar(x, y2_back, yerr=y2err_back, fmt='o', capsize=5, alpha=0.5, color='#414487')
plt.plot(range(len(y3_back)), y3_back, label='Monthly NO$_2$ mean 2021 - back', color='#2a788e')
plt.errorbar(range(len(y3_back)), y3_back, yerr=y3err_back, fmt='o', capsize=5, alpha=0.5, color='#2a788e')
plt.ticklabel_format(axis='y', style='sci', scilimits=(-5, -5))
plt.xticks(rotation = 90)
plt.ylabel("NO$_2$ vertical column (mol m$^{-2}$)")
plt.legend()
plt.tight_layout()
plt.show()

Plot the trend of monthly statistics in shipping route cluster, compared with the global container throughput index

In [105]:
## Prepare data
x = np.array(month_name)
y_ship = [*no2_monthly_mean_shipping.values()]
y_back = [*no2_monthly_mean_background.values()]
yerr_ship = [*no2_monthly_std_shipping.values()]
yerr_back = [*no2_monthly_std_background.values()]
global_throughput_index = pd.read_excel('https://www.isl.org/public/containerumschlag-index/2023-04/containerumschlag-index_230531.xlsx')
start = global_throughput_index.index[global_throughput_index.iloc[:, 0] == datetime.datetime(2018, 12, 1, 0, 0)].item()
end = global_throughput_index.index[global_throughput_index.iloc[:, 0] == datetime.datetime(2021, 11, 1, 0, 0)].item()
global_throughput_original = global_throughput_index.iloc[start:(end + 1), 1].tolist()

## Plot trend time series
fig, ax1 = plt.subplots(figsize=(14, 7))
plt.xticks(rotation = 90)
plt.ylabel("NO$_2$ vertical column (mol m$^{-2}$)")
ax2 = ax1.twinx()
ax1.plot(x, y_ship, label='Monthly mean NO$_2$ vertical column - shipping route', color='#440154')
ax1.errorbar(x, y_ship, yerr=yerr_ship, fmt='o', capsize=5, alpha=0.5, color='#440154')
ax1.ticklabel_format(axis='y', style='sci', scilimits=(-5, -5))
ax2.plot(x, global_throughput_original, label='RWI/ISL container throughput index - original', color='#21918c')
ax2.grid(False)
ax2.set_ylim([90, 140])
plt.ylabel("Container throughput index (index$_{2015}$=100)")
c1_patch = mpatches.Patch(color='#440154', label='Monthly mean NO$_2$ vertical column - shipping route')
c2_patch = mpatches.Patch(color='#21918c', label='RWI/ISL container throughput index - original')
plt.legend(handles=[c1_patch, c2_patch], loc='upper right')
plt.tight_layout()
plt.show()

Perform seasonal decomposition on the data

In [106]:
## Perform seasonal decomposition
seasonal_decomp = statsmodels.tsa.seasonal.seasonal_decompose(y_ship, model='additive', period=12)

## Plot seasonality
plt.figure(figsize=(14, 7))
plt.plot(seasonal_decomp.seasonal)
plt.xticks(np.arange(0, 36), pd.Series(month_name), rotation = 90)
plt.legend(['Seasonality of monthly mean NO$_2$ vertical column - shipping route'], loc='upper right')
plt.show()

## Plot deseasonalized NO2 data
plt.figure(figsize=(14, 7))
plt.plot(y_ship - seasonal_decomp.seasonal)
plt.xticks(np.arange(0, 36), pd.Series(month_name), rotation = 90)
plt.legend(['Deseasonalized monthly mean NO$_2$ vertical column - shipping route'], loc='upper right')
plt.show()

Plot the trend of deseasonalized monthly statistics in shipping route cluster, compared with the global container throughput adjusted index

In [107]:
## Prepare data
x = np.array(month_name)
y_ship = [*no2_monthly_mean_shipping.values()]
y_back = [*no2_monthly_mean_background.values()]
yerr_ship = [*no2_monthly_std_shipping.values()]
yerr_back = [*no2_monthly_std_background.values()]
global_throughput_index = pd.read_excel('https://www.isl.org/public/containerumschlag-index/2023-04/containerumschlag-index_230531.xlsx')
start = global_throughput_index.index[global_throughput_index.iloc[:, 0] == datetime.datetime(2018, 12, 1, 0, 0)].item()
end = global_throughput_index.index[global_throughput_index.iloc[:, 0] == datetime.datetime(2021, 11, 1, 0, 0)].item()
global_throughput_adjusted = global_throughput_index.iloc[start:(end + 1), 2].tolist()

## Plot trend time series
fig, ax1 = plt.subplots(figsize=(14, 7))
plt.xticks(rotation = 90)
plt.ylabel("NO$_2$ vertical column (mol m$^{-2}$)")
ax2 = ax1.twinx()
ax1.plot(x, y_ship - seasonal_decomp.seasonal, label='Deseasonalized monthly mean NO$_2$ vertical column - shipping route', color='#440154')
ax1.errorbar(x, y_ship - seasonal_decomp.seasonal, yerr=yerr_ship, fmt='o', capsize=5, alpha=0.5, color='#440154')
ax1.ticklabel_format(axis='y', style='sci', scilimits=(-4, -4))
ax2.plot(x, global_throughput_adjusted, label='RWI/ISL container throughput index - adjusted', color='#21918c')
ax2.grid(False)
ax2.set_ylim([90, 140])
plt.ylabel("Container throughput index (index$_{2015}$=100)")
c1_patch = mpatches.Patch(color='#440154', label='Deseasonalized monthly mean NO$_2$ vertical column - shipping route')
c2_patch = mpatches.Patch(color='#21918c', label='RWI/ISL container throughput index - adjusted')
plt.legend(handles=[c1_patch, c2_patch], loc='upper right')
plt.tight_layout()
plt.show()

Perform statistical analysis

In [108]:
## Monthly mean NO2 on shipping route vs. global throughput index
## Pearson correlation
print('Temporal Pearson correlation between shipping & throughput index (original):', stats.pearsonr(y_ship, global_throughput_original))
print('Temporal Pearson correlation between deseasonalized shipping & throughput index (adjusted):', stats.pearsonr(y_ship - seasonal_decomp.seasonal, global_throughput_adjusted))
Temporal Pearson correlation between shipping & throughput index (original): PearsonRResult(statistic=0.17019354794446567, pvalue=0.3210086750037733)
Temporal Pearson correlation between deseasonalized shipping & throughput index (adjusted): PearsonRResult(statistic=0.6750764374545704, pvalue=6.2965013285031515e-06)

Calculate yearly mean & std per cluster

  1. Shipping route
In [109]:
## Prepare data for shipping-route cluster
data_shipping = np.multiply(copy.deepcopy(no2_vcd)[:, :, :], no2_ConvGistar_shipping)
data_shipping_2019 = data_shipping[27:(27 + 365), :, :]
data_shipping_2020 = data_shipping[(27 + 365):(27 + 365 + 366), :, :]
data_shipping_2021 = data_shipping[(27 + 365 + 366):(27 + 365 + 366 + 366), :, :]

## 2019 NO2 column density
print('NO2 mean (2019):', round_sf(np.nanmean(data_shipping_2019), 4), '\nNO2 std (2019):', round_sf(np.nanstd(data_shipping_2019), 4))

## 2020 NO2 column density
print('NO2 mean (2020):', round_sf(np.nanmean(data_shipping_2020), 4), '\nNO2 std (2020):', round_sf(np.nanstd(data_shipping_2020), 4))

## 2021 NO2 column density
print('NO2 mean (2021):', round_sf(np.nanmean(data_shipping_2021), 4), '\nNO2 std (2021):', round_sf(np.nanstd(data_shipping_2021), 4))
NO2 mean (2019): 1.661e-05 
NO2 std (2019): 9.476e-06
NO2 mean (2020): 1.627e-05 
NO2 std (2020): 9.098e-06
NO2 mean (2021): 1.75e-05 
NO2 std (2021): 8.984e-06
In [110]:
## 2020/2019 ratio
ratio_shipping_2020_2019 = np.nanmean(data_shipping_2020, axis=0) / np.nanmean(data_shipping_2019, axis=0)
print('NO2 ratio mean (2020/2019):', round_sf(np.nanmean(ratio_shipping_2020_2019), 4), '\nNO2 ratio std (2020/2019):', round_sf(np.nanstd(ratio_shipping_2020_2019), 4))

## 2021/2020 ratio
ratio_shipping_2021_2020 = np.nanmean(data_shipping_2021, axis=0) / np.nanmean(data_shipping_2020, axis=0)
print('NO2 ratio mean (2021/2020):', round_sf(np.nanmean(ratio_shipping_2021_2020), 4), '\nNO2 ratio std (2021/2020):', round_sf(np.nanstd(ratio_shipping_2021_2020), 4))

## 2020/2019 ratio
ratio_shipping_2021_2019 = np.nanmean(data_shipping_2021, axis=0) / np.nanmean(data_shipping_2019, axis=0)
print('NO2 ratio mean (2021/2019):', round_sf(np.nanmean(ratio_shipping_2021_2019), 4), '\nNO2 ratio std (2021/2019):', round_sf(np.nanstd(ratio_shipping_2021_2019), 4))
NO2 ratio mean (2020/2019): 0.9799 
NO2 ratio std (2020/2019): 0.03766
NO2 ratio mean (2021/2020): 1.077 
NO2 ratio std (2021/2020): 0.0496
NO2 ratio mean (2021/2019): 1.055 
NO2 ratio std (2021/2019): 0.05015

Calculate yearly mean & std per cluster

  1. Background
In [111]:
## Prepare data for background cluster
data_background = np.multiply(copy.deepcopy(no2_vcd)[:, :, :], no2_ConvGistar_background)
data_background_2019 = data_background[27:(27 + 365), :, :]
data_background_2020 = data_background[(27 + 365):(27 + 365 + 366), :, :]
data_background_2021 = data_background[(27 + 365 + 366):(27 + 365 + 366 + 366), :, :]

## 2019 NO2 column density
print('NO2 mean (2019):', round_sf(np.nanmean(data_background_2019), 4), '\nNO2 std (2019):', round_sf(np.nanstd(data_background_2019), 4))

## 2020 NO2 column density
print('NO2 mean (2020):', round_sf(np.nanmean(data_background_2020), 4), '\nNO2 std (2020):', round_sf(np.nanstd(data_background_2020), 4))

## 2021 NO2 column density
print('NO2 mean (2021):', round_sf(np.nanmean(data_background_2021), 4), '\nNO2 std (2021):', round_sf(np.nanstd(data_background_2021), 4))
NO2 mean (2019): 1.16e-05 
NO2 std (2019): 5.712e-06
NO2 mean (2020): 1.168e-05 
NO2 std (2020): 6.087e-06
NO2 mean (2021): 1.273e-05 
NO2 std (2021): 5.837e-06
In [112]:
## 2020/2019 ratio
ratio_background_2020_2019 = np.nanmean(data_background_2020, axis=0) / np.nanmean(data_background_2019, axis=0)
print('NO2 ratio mean (2020/2019):', round_sf(np.nanmean(ratio_background_2020_2019), 4), '\nNO2 ratio std (2020/2019):', round_sf(np.nanstd(ratio_background_2020_2019), 4))

## 2021/2020 ratio
ratio_background_2021_2020 = np.nanmean(data_background_2021, axis=0) / np.nanmean(data_background_2020, axis=0)
print('NO2 ratio mean (2021/2020):', round_sf(np.nanmean(ratio_background_2021_2020), 4), '\nNO2 ratio std (2021/2020):', round_sf(np.nanstd(ratio_background_2021_2020), 4))

## 2020/2019 ratio
ratio_background_2021_2019 = np.nanmean(data_background_2021, axis=0) / np.nanmean(data_background_2019, axis=0)
print('NO2 ratio mean (2021/2019):', round_sf(np.nanmean(ratio_background_2021_2019), 4), '\nNO2 ratio std (2021/2019):', round_sf(np.nanstd(ratio_background_2021_2019), 4))
NO2 ratio mean (2020/2019): 1.008 
NO2 ratio std (2020/2019): 0.03899
NO2 ratio mean (2021/2020): 1.091 
NO2 ratio std (2021/2020): 0.05854
NO2 ratio mean (2021/2019): 1.099 
NO2 ratio std (2021/2019): 0.04806

Comparison with slant column density¶

In [113]:
## Load TROPOMI dataset (netCDF)
no2_nc_filename = "./dataset/cube_no2_shipping_RedSea.nc"

## Read netCDF file
ds = nc.Dataset(no2_nc_filename, 'r')

## Store dimension information (time, latitude, and longitude)
dim_names = list(ds.dimensions) # get variable keys
time, lats, lons = ds.variables[dim_names[0]][:], ds.variables[dim_names[1]][:], ds.variables[dim_names[2]][:]
print("Dimensions of netCDF NO2 data:")
for dim in ds.dimensions.items():
    print(dim)

## Store NO2 column measurements from netCDF dataset
product = ds['/PRODUCT/SUPPORT_DATA/DETAILED_RESULTS']
no2_scd_attr = list(product.variables.items())
no2_scd_name = no2_scd_attr[0][0]
no2_scd = product.variables[no2_scd_name][:len(time)] # vertical column

## Close netCDF file
ds.close()

## Crop the dataset and dimension w.r.t. the region of interest
## If interested in a smaller ROI, change lats_roi = (0, lats_roi), and lons_roi = (0, lons_roi) where lats_roi <= lats.size and lons_roi <= lons.size
lats_roi = (0, lats.size)
lons_roi = (0, lons.size)
lats, lons = lats[lats_roi[0]:lats_roi[1]], lons[lons_roi[0]:lons_roi[1]]
no2_scd = no2_scd[:, lats_roi[0]:lats_roi[1], lons_roi[0]:lons_roi[1]]
extent = [min(lons), max(lons), min(lats), max(lats)] # coordinate extent of given netCDF dataset

## only run when Indian Ocean
# no2_scd[330, :, 160:] = np.nan ## Part of Indian Ocean dataset on 29 October 2019 should be masked due to large noise

del dim_names, dim, ds, product, no2_scd_attr, no2_scd_name
Dimensions of netCDF NO2 data:
('Time', <class 'netCDF4._netCDF4.Dimension'> (unlimited): name = 'Time', size = 1072)
('Latitude', <class 'netCDF4._netCDF4.Dimension'>: name = 'Latitude', size = 320)
('Longitude', <class 'netCDF4._netCDF4.Dimension'>: name = 'Longitude', size = 320)

Calculate monthly statistics per clusters (e.g. shipping route vs. background)

In [114]:
## Calculate monthly mean & std
no2_monthly_mean_shipping = {}
no2_monthly_mean_background = {}
no2_monthly_std_shipping = {}
no2_monthly_std_background = {}
for i in range(len(month_index) - 1):
    no2_monthly_mean_shipping["{0}".format(i)] = np.nanmean(np.nanmean(np.multiply(copy.deepcopy(no2_scd)[month[str(i)], :, :], no2_ConvGistar_shipping), axis=0))
    no2_monthly_mean_background["{0}".format(i)] = np.nanmean(np.nanmean(np.multiply(copy.deepcopy(no2_scd)[month[str(i)], :, :], no2_ConvGistar_background), axis=0))
    no2_monthly_std_shipping["{0}".format(i)] = np.nanstd(np.nanmean(np.multiply(copy.deepcopy(no2_scd)[month[str(i)], :, :], no2_ConvGistar_shipping), axis=0))
    no2_monthly_std_background["{0}".format(i)] = np.nanstd(np.nanmean(np.multiply(copy.deepcopy(no2_scd)[month[str(i)], :, :], no2_ConvGistar_background), axis=0))

Plot the trend of monthly statistics in shipping route cluster, compared with the global container throughput index

In [115]:
## Prepare data
x = np.array(month_name)
y_ship = [*no2_monthly_mean_shipping.values()]
y_back = [*no2_monthly_mean_background.values()]
yerr_ship = [*no2_monthly_std_shipping.values()]
yerr_back = [*no2_monthly_std_background.values()]
global_throughput_index = pd.read_excel('https://www.isl.org/public/containerumschlag-index/2023-04/containerumschlag-index_230531.xlsx')
start = global_throughput_index.index[global_throughput_index.iloc[:, 0] == datetime.datetime(2018, 12, 1, 0, 0)].item()
end = global_throughput_index.index[global_throughput_index.iloc[:, 0] == datetime.datetime(2021, 11, 1, 0, 0)].item()
global_throughput_original = global_throughput_index.iloc[start:(end + 1), 1].tolist()

## Plot trend time series
fig, ax1 = plt.subplots(figsize=(14, 7))
plt.xticks(rotation = 90)
plt.ylabel("NO$_2$ slant column (mol m$^{-2}$)")
ax2 = ax1.twinx()
ax1.plot(x, y_ship, label='Monthly mean NO$_2$ slant column - shipping route', color='#440154')
ax1.errorbar(x, y_ship, yerr=yerr_ship, fmt='o', capsize=5, alpha=0.5, color='#440154')
ax1.ticklabel_format(axis='y', style='sci', scilimits=(-4, -4))
ax2.plot(x, global_throughput_original, label='RWI/ISL container throughput index - original', color='#21918c')
ax2.grid(False)
ax2.set_ylim([90, 140])
plt.ylabel("Container throughput index (index$_{2015}$=100)")
c1_patch = mpatches.Patch(color='#440154', label='Monthly mean NO$_2$ slant column - shipping route')
c2_patch = mpatches.Patch(color='#21918c', label='RWI/ISL container throughput index - original')
plt.legend(handles=[c1_patch, c2_patch], loc='upper right')
plt.tight_layout()
plt.show()

Perform seasonal decomposition on the data

In [116]:
## Perform seasonal decomposition
seasonal_decomp = statsmodels.tsa.seasonal.seasonal_decompose(y_ship, model='additive', period=12)

## Plot seasonality
plt.figure(figsize=(14, 7))
plt.plot(seasonal_decomp.seasonal)
plt.xticks(np.arange(0, 36), pd.Series(month_name), rotation = 90)
plt.legend(['Seasonality of monthly mean NO$_2$ slant column - shipping route'], loc='upper right')
plt.show()

## Plot deseasonalized NO2 data
plt.figure(figsize=(14, 7))
plt.plot(y_ship - seasonal_decomp.seasonal)
plt.xticks(np.arange(0, 36), pd.Series(month_name), rotation = 90)
plt.legend(['Deseasonalized monthly mean NO$_2$ slant column - shipping route'], loc='upper right')
plt.show()

Plot the trend of deseasonalized monthly statistics in shipping route cluster, compared with the global container throughput adjusted index

In [117]:
## Prepare data
x = np.array(month_name)
y_ship = [*no2_monthly_mean_shipping.values()]
y_back = [*no2_monthly_mean_background.values()]
yerr_ship = [*no2_monthly_std_shipping.values()]
yerr_back = [*no2_monthly_std_background.values()]
global_throughput_index = pd.read_excel('https://www.isl.org/public/containerumschlag-index/2023-04/containerumschlag-index_230531.xlsx')
start = global_throughput_index.index[global_throughput_index.iloc[:, 0] == datetime.datetime(2018, 12, 1, 0, 0)].item()
end = global_throughput_index.index[global_throughput_index.iloc[:, 0] == datetime.datetime(2021, 11, 1, 0, 0)].item()
global_throughput_adjusted = global_throughput_index.iloc[start:(end + 1), 2].tolist()

## Plot trend time series
fig, ax1 = plt.subplots(figsize=(14, 7))
plt.xticks(rotation = 90)
plt.ylabel("NO$_2$ slant column (mol m$^{-2}$)")
ax2 = ax1.twinx()
ax1.plot(x, y_ship - seasonal_decomp.seasonal, label='Deseasonalized monthly  mean NO$_2$ slant column - shipping route', color='#440154')
ax1.errorbar(x, y_ship - seasonal_decomp.seasonal, yerr=yerr_ship, fmt='o', capsize=5, alpha=0.5, color='#440154')
ax1.ticklabel_format(axis='y', style='sci', scilimits=(-4, -4))
ax2.plot(x, global_throughput_adjusted, label='RWI/ISL container throughput index - adjusted', color='#21918c')
ax2.grid(False)
ax2.set_ylim([90, 140])
plt.ylabel("Container throughput index (index$_{2015}$=100)")
c1_patch = mpatches.Patch(color='#440154', label='Deseasonalized monthly mean NO$_2$ slant column - shipping route')
c2_patch = mpatches.Patch(color='#21918c', label='RWI/ISL container throughput index - adjusted')
plt.legend(handles=[c1_patch, c2_patch], loc='upper right')
plt.tight_layout()
plt.show()

Perform statistical analysis

In [118]:
## Monthly mean NO2 on shipping route vs. global throughput index
## Pearson correlation
print('Temporal Pearson correlation between shipping & throughput index (original):', stats.pearsonr(y_ship, global_throughput_original))
print('Temporal Pearson correlation between deseasonalized shipping & throughput index (adjusted):', stats.pearsonr(y_ship - seasonal_decomp.seasonal, global_throughput_adjusted))
Temporal Pearson correlation between shipping & throughput index (original): PearsonRResult(statistic=0.6042468732311803, pvalue=9.519631104389306e-05)
Temporal Pearson correlation between deseasonalized shipping & throughput index (adjusted): PearsonRResult(statistic=0.7057877159188211, pvalue=1.5235283878666406e-06)

Calculate yearly mean & std per cluster

  1. Shipping route
In [119]:
## Prepare data for shipping-route cluster
data_shipping = np.multiply(copy.deepcopy(no2_scd)[:, :, :], no2_ConvGistar_shipping)
data_shipping_2019 = data_shipping[27:(27 + 365), :, :]
data_shipping_2020 = data_shipping[(27 + 365):(27 + 365 + 366), :, :]
data_shipping_2021 = data_shipping[(27 + 365 + 366):(27 + 365 + 366 + 366), :, :]

## 2019 NO2 column density
print('NO2 mean (2019):', round_sf(np.nanmean(data_shipping_2019), 5), '\nNO2 std (2019):', round_sf(np.nanstd(data_shipping_2019), 4))

## 2020 NO2 column density
print('NO2 mean (2020):', round_sf(np.nanmean(data_shipping_2020), 5), '\nNO2 std (2020):', round_sf(np.nanstd(data_shipping_2020), 4))

## 2021 NO2 column density
print('NO2 mean (2021):', round_sf(np.nanmean(data_shipping_2021), 5), '\nNO2 std (2021):', round_sf(np.nanstd(data_shipping_2021), 4))
NO2 mean (2019): 0.00012139 
NO2 std (2019): 2.194e-05
NO2 mean (2020): 0.00011606 
NO2 std (2020): 1.815e-05
NO2 mean (2021): 0.00012652 
NO2 std (2021): 1.942e-05
In [120]:
## 2020/2019 ratio
ratio_shipping_2020_2019 = np.nanmean(data_shipping_2020, axis=0) / np.nanmean(data_shipping_2019, axis=0)
print('NO2 ratio mean (2020/2019):', round_sf(np.nanmean(ratio_shipping_2020_2019), 4), '\nNO2 ratio std (2020/2019):', round_sf(np.nanstd(ratio_shipping_2020_2019), 4))

## 2021/2020 ratio
ratio_shipping_2021_2020 = np.nanmean(data_shipping_2021, axis=0) / np.nanmean(data_shipping_2020, axis=0)
print('NO2 ratio mean (2021/2020):', round_sf(np.nanmean(ratio_shipping_2021_2020), 4), '\nNO2 ratio std (2021/2020):', round_sf(np.nanstd(ratio_shipping_2021_2020), 4))

## 2020/2019 ratio
ratio_shipping_2021_2019 = np.nanmean(data_shipping_2021, axis=0) / np.nanmean(data_shipping_2019, axis=0)
print('NO2 ratio mean (2021/2019):', round_sf(np.nanmean(ratio_shipping_2021_2019), 4), '\nNO2 ratio std (2021/2019):', round_sf(np.nanstd(ratio_shipping_2021_2019), 4))
NO2 ratio mean (2020/2019): 0.9556 
NO2 ratio std (2020/2019): 0.007674
NO2 ratio mean (2021/2020): 1.09 
NO2 ratio std (2021/2020): 0.01014
NO2 ratio mean (2021/2019): 1.041 
NO2 ratio std (2021/2019): 0.00885

Calculate yearly mean & std per cluster

  1. Background
In [121]:
## Prepare data for background cluster
data_background = np.multiply(copy.deepcopy(no2_scd)[:, :, :], no2_ConvGistar_background)
data_background_2019 = data_background[27:(27 + 365), :, :]
data_background_2020 = data_background[(27 + 365):(27 + 365 + 366), :, :]
data_background_2021 = data_background[(27 + 365 + 366):(27 + 365 + 366 + 366), :, :]

## 2019 NO2 column density
print('NO2 mean (2019):', round_sf(np.nanmean(data_background_2019), 5), '\nNO2 std (2019):', round_sf(np.nanstd(data_background_2019), 4))

## 2020 NO2 column density
print('NO2 mean (2020):', round_sf(np.nanmean(data_background_2020), 5), '\nNO2 std (2020):', round_sf(np.nanstd(data_background_2020), 4))

## 2021 NO2 column density
print('NO2 mean (2021):', round_sf(np.nanmean(data_background_2021), 5), '\nNO2 std (2021):', round_sf(np.nanstd(data_background_2021), 4))
NO2 mean (2019): 0.000115 
NO2 std (2019): 1.969e-05
NO2 mean (2020): 0.00011017 
NO2 std (2020): 1.663e-05
NO2 mean (2021): 0.00012118 
NO2 std (2021): 1.839e-05
In [122]:
## 2020/2019 ratio
ratio_background_2020_2019 = np.nanmean(data_background_2020, axis=0) / np.nanmean(data_background_2019, axis=0)
print('NO2 ratio mean (2020/2019):', round_sf(np.nanmean(ratio_background_2020_2019), 4), '\nNO2 ratio std (2020/2019):', round_sf(np.nanstd(ratio_background_2020_2019), 4))

## 2021/2020 ratio
ratio_background_2021_2020 = np.nanmean(data_background_2021, axis=0) / np.nanmean(data_background_2020, axis=0)
print('NO2 ratio mean (2021/2020):', round_sf(np.nanmean(ratio_background_2021_2020), 4), '\nNO2 ratio std (2021/2020):', round_sf(np.nanstd(ratio_background_2021_2020), 4))

## 2020/2019 ratio
ratio_background_2021_2019 = np.nanmean(data_background_2021, axis=0) / np.nanmean(data_background_2019, axis=0)
print('NO2 ratio mean (2021/2019):', round_sf(np.nanmean(ratio_background_2021_2019), 4), '\nNO2 ratio std (2021/2019):', round_sf(np.nanstd(ratio_background_2021_2019), 4))
NO2 ratio mean (2020/2019): 0.9574 
NO2 ratio std (2020/2019): 0.007282
NO2 ratio mean (2021/2020): 1.099 
NO2 ratio std (2021/2020): 0.008152
NO2 ratio mean (2021/2019): 1.052 
NO2 ratio std (2021/2019): 0.00759